{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "计算特征值和特征向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.940969  , 0.12393688])"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "iris = pd.read_csv(\"http://image.cador.cn/data/iris.csv\")\n",
    "corr = iris.corr()\n",
    "\n",
    "# 将corr进行分块，1:2两个变量一组，3:4是另外一组，并进行两两组合\n",
    "X11 = corr.iloc[0:2,0:2]\n",
    "X12 = corr.iloc[0:2,2:4]\n",
    "X21 = corr.iloc[2:4,0:2]\n",
    "X22 = corr.iloc[2:4,2:4]\n",
    "\n",
    "# 按公式求解矩阵A和B\n",
    "A=np.matmul(np.matmul(np.matmul(np.linalg.inv(X11), X12),np.linalg.inv(X22)),X21)\n",
    "B=np.matmul(np.matmul(np.matmul(np.linalg.inv(X22), X21),np.linalg.inv(X11)),X12)\n",
    "\n",
    "# 求解典型相关系数\n",
    "A_eig_values,A_eig_vectors = np.linalg.eig(A)\n",
    "B_eig_values,B_eig_vectors = np.linalg.eig(B)\n",
    "np.sqrt(A_eig_values)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "比较A与XΛX^(-1)是否相等"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Petal.Length</th>\n",
       "      <th>Petal.Width</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Sepal.Length</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sepal.Width</th>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Petal.Length  Petal.Width\n",
       "Sepal.Length           0.0          0.0\n",
       "Sepal.Width            0.0         -0.0"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "round(A-np.matmul(np.matmul(A_eig_vectors,np.diag(A_eig_values)),np.linalg.inv(A_eig_vectors)),5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "比较B与YΛY^(-1)是否相等"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sepal.Length</th>\n",
       "      <th>Sepal.Width</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Petal.Length</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Petal.Width</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Sepal.Length  Sepal.Width\n",
       "Petal.Length           0.0          0.0\n",
       "Petal.Width            0.0          0.0"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "round(B-np.matmul(np.matmul(B_eig_vectors,np.diag(B_eig_values)),np.linalg.inv(B_eig_vectors)),5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "验证A对应的典型变量C1其标准差是否为1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Petal.Length   -2.842171e-16\n",
       "Petal.Width    -4.144833e-16\n",
       "dtype: float64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 将变量分组，并进行标准化处理\n",
    "iris_g1 = iris.iloc[:,0:2]\n",
    "iris_g1 = iris_g1.apply(lambda x:(x - np.mean(x))/np.std(x))\n",
    "iris_g2 = iris.iloc[:,2:4]\n",
    "iris_g2 = iris_g2.apply(lambda x:(x - np.mean(x))/np.std(x))\n",
    "# 求解A对应的特征向量并计算典型向量C1\n",
    "C1 = np.matmul(iris_g1,A_eig_vectors)\n",
    "# 验证C1对应各变量的标准差是否为1，同时查看均值\n",
    "C1.apply(np.std)\n",
    "\n",
    "#  Sepal.Length    1.041196\n",
    "#  Sepal.Width     0.951045\n",
    "#  dtype: float64\n",
    "\n",
    "C1.apply(np.mean)\n",
    "\n",
    "# Sepal.Length   -1.894781e-16\n",
    "# Sepal.Width    -9.000208e-16\n",
    "# dtype: float64\n",
    "\n",
    "# 由于均值为0，标准差不为1，这里对特征向量进行伸缩变换\n",
    "eA=np.matmul(A_eig_vectors,np.diag(1/C1.apply(np.std)))\n",
    "\n",
    "# 再次验证方差和均值\n",
    "C1 = np.matmul(iris_g1,eA)\n",
    "C1.apply(np.std)\n",
    "\n",
    "# Sepal.Length    1.0\n",
    "# Sepal.Width     1.0\n",
    "# dtype: float64\n",
    "\n",
    "C1.apply(np.mean)\n",
    "\n",
    "# Sepal.Length   -1.894781e-16\n",
    "# Sepal.Width    -9.000208e-16\n",
    "# dtype: float64\n",
    "\n",
    "#可见，特征向量已经满足要求，同理对B可得\n",
    "C2 = np.matmul(iris_g2,B_eig_vectors)\n",
    "C2.apply(np.std)\n",
    "\n",
    "# Petal.Length    0.629124\n",
    "# Petal.Width     0.200353\n",
    "# dtype: float64\n",
    "\n",
    "C2.apply(np.mean)\n",
    "\n",
    "# Petal.Length   -1.421085e-16\n",
    "# Petal.Width    -7.993606e-17\n",
    "# dtype: float64\n",
    "\n",
    "eB=np.matmul(B_eig_vectors,np.diag(1/C2.apply(np.std)))\n",
    "C2 = np.matmul(iris_g2,eB)\n",
    "C2.apply(np.std)\n",
    "\n",
    "# Petal.Length    1.0\n",
    "# Petal.Width     1.0\n",
    "# dtype: float64\n",
    "\n",
    "C2.apply(np.mean)\n",
    "\n",
    "# Petal.Length   -2.842171e-16\n",
    "# Petal.Width    -4.144833e-16\n",
    "#dtype: float64"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "验证C1和C2相关性"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sepal.Length</th>\n",
       "      <th>Sepal.Width</th>\n",
       "      <th>Petal.Length</th>\n",
       "      <th>Petal.Width</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Sepal.Length</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>-0.00000</td>\n",
       "      <td>0.94097</td>\n",
       "      <td>-0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sepal.Width</th>\n",
       "      <td>-0.00000</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.12394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Petal.Length</th>\n",
       "      <td>0.94097</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Petal.Width</th>\n",
       "      <td>-0.00000</td>\n",
       "      <td>0.12394</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>1.00000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Sepal.Length  Sepal.Width  Petal.Length  Petal.Width\n",
       "Sepal.Length       1.00000     -0.00000       0.94097     -0.00000\n",
       "Sepal.Width       -0.00000      1.00000       0.00000      0.12394\n",
       "Petal.Length       0.94097      0.00000       1.00000      0.00000\n",
       "Petal.Width       -0.00000      0.12394       0.00000      1.00000"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "round(pd.concat([C1,C2],axis=1).corr(),5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "求解典型相关系数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sepal.Length</th>\n",
       "      <th>Sepal.Width</th>\n",
       "      <th>Petal.Length</th>\n",
       "      <th>Petal.Width</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Sepal.Length</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.94097</td>\n",
       "      <td>-0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sepal.Width</th>\n",
       "      <td>0.00000</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>-0.00001</td>\n",
       "      <td>0.12394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Petal.Length</th>\n",
       "      <td>0.94097</td>\n",
       "      <td>-0.00001</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>-0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Petal.Width</th>\n",
       "      <td>-0.00000</td>\n",
       "      <td>0.12394</td>\n",
       "      <td>-0.00000</td>\n",
       "      <td>1.00000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Sepal.Length  Sepal.Width  Petal.Length  Petal.Width\n",
       "Sepal.Length       1.00000      0.00000       0.94097     -0.00000\n",
       "Sepal.Width        0.00000      1.00000      -0.00001      0.12394\n",
       "Petal.Length       0.94097     -0.00001       1.00000     -0.00000\n",
       "Petal.Width       -0.00000      0.12394      -0.00000      1.00000"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.cross_decomposition import CCA\n",
    "cca = CCA(n_components=2)\n",
    "cca.fit(iris_g1,iris_g2)\n",
    "# X_c与Y_c分别为转换之后的典型变量\n",
    "X_c, Y_c = cca.transform(iris_g1, iris_g2)\n",
    "round(pd.concat([pd.DataFrame(X_c,columns=iris_g1.columns),\n",
    "                 pd.DataFrame(Y_c,columns=iris_g2.columns)],axis=1).corr(),5)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
